* if not already done: cd to the folder where this script is stored, which should have a "data" subfolder with the data and another subfolder "output" to store the results
* requires user-written package eststo

********************
*** DEMOGRAPHICS ***

*** Table 1 (summary statistics)

* restricted judge data (only sum stats, raw data only disclosable under NDA per IRB/FJC)
use data\USdemographics, clear // generated by ingestion script
foreach v of varlist age* { // this and the next four lines are a clumsy way to confirm that age group 2 is the median age group
	local l`v' : variable label `v'
	}
assert "`lage1'"=="fraction age==45-55" &  "`lage2'"=="fraction age==55-65" &  "`lage3'"=="fraction age==65-75" &  "`lage4'"=="fraction age==over75"
assert age1+age5<.5 & age3+age4<.5 // neither higher nor lower than 55-65 is a majority, so age2 (55-65) is the median
local medianage: variable label age2
scalar Female = gender1[1]

* other
use data\studentjudge_maindata.dta, clear
assert precedent!="besic":precedent // Besic participants should have been dropped during ingestion and would throw an error in analysis; double-checking here
replace confidence = confidence/100
gen reasons = 1 if !mi(judgmentreasons)
gen female = gender=="female"
gen threeL = classyear=="3L" if !mi(classyear)
collapse (count) N=guilty reasons sentence (mean) guilty doctime female threeL confidence knowICL recognize (median) msentence=sentence, by(type)
replace female=Female if type=="judge":type
gen Age = "`medianage'" if type=="judge":type
label var N "N"
label var reasons "     with reasons"
label var sentence "     with sentence"
label var doctime "Time spent"
label var female "Female"
label var threeL "Fraction 3Ls (vs. 2Ls)"
label var confidence "Confidence in decision"
label var knowICL "Prior knowledge of int'l criminal law"
label var recognize "Recognized names and places in the case"
label var guilty "Affirmed"
label var msentence "Median sentence (years)"
order type N reasons sentence female Age threeL know recogn confidence doctime guilty msentence
export excel using output\table1.xlsx, firstrow(varlabels) replace

/*** demographics of invitees (not reported -- see paper for reasons -- cannot be disclosed without permission from the FJC)
use "C:\Users\hspamann\OneDrive - Harvard University\Documents\Projects\Published\Experiment2\seminar_participating_judges\FJC_LS_participant_demographics_2015-2018.dta" if YearAttended==2015, clear // change to wherever you have the file, if you have it
gen female = Gender=="Female"
gen white = Race=="White"
label var experience "Experience"
label var Age "Age"
gen Democrat = AppointingPres=="Democratic" if !mi(AppointingPres)
label var Democrat "Democratic Appointee"
gen ArtIII = inlist(type,"Circuit","District")
recode ArtIII (0=2)
table (ArtIII type), stat(freq) stat(median Age experience) stat(mean female white Democrat) nformat(%3.2f mean) nformat(%2.0f median)
collect export output/inviteedemographics.docx, replace
*/

*** does gender play a role? (unreported)
eststo clear
use data\studentjudge_maindata.dta, clear
label define female 1 "female" 0 "male"
encode gender, gen(female) label(female)
recode female (2=.) // "na" values
eststo gender: reg guilty 0.female i.nationality 0.female#nationality i.precedent 0.female#precedent, level(90)
test 0.female#2.nationality 0.female#2.precedent
estadd scalar joint_p = r(p)
esttab gender using output\genderconfoundcheck.html, replace noomitted nostar b(%4.2f) ci obslast stats(N joint_p, labels("N" "joint p for female interactions") fmt(%3.0f %5.3f))


********************************
**** ANALYSIS ******************

foreach dataversion in "" "_JLS" { // runs the analysis with the judge data as described in the paper (""), or alternatively with those from Spamann & Kloehn 2016 ("_JLS")

eststo clear

use data\studentjudge_maindata.dta, clear
if "`dataversion'" == "_JLS" {
	drop knowICL recognize // these aren't needed and are differently encoded in the SK 2016 data
	append using data\FJC_April2015_nopersonal.dta, gen(JLS) // these are the data used in Spamann & Kloehn JLS 2016
	duplicates report randomID
	drop if type=="judge":type & JLS==0
	replace type="judge":type if mi(type) & JLS==1
	drop if mi(guilty) // those were left in the SK 2016 data
	}

*** Table 2: univariate: proportion guilty for each precedent-nationality cells (incl unreported tests of differences by cell)
table nationality precedent, command(r(p): ttest guilty, by(type)) command(r(p_exact): tab guilty type, exact) nformat(%4.3f)
table (nationality) (type precedent), stat(mean guilty) stat(total guilty) stat(freq) totals(type type#precedent type#nationalit) nformat(%3.2f mean)
collect export output/table2`dataversion'.docx, replace // note that defendant order is inverse of that in the paper

*** univariate tests: by group
* NB: only Fischer exact - Boschloo unconditional tests can't be done in Stata but on http://www4.stat.ncsu.edu/~boos/exact/ or in R using the package "Exact"
forvalues type=1/2 {
	table nationality precedent if type==`type', statistic(total guilty) statistic(freq) statistic(mean guilty) nformat(%3.2f mean)
	tab nationality guilty if type==`type', exact
	tab precedent guilty if type==`type', exact
}

*** Table 3: regressions
foreach regression in reg logit {
	eststo `regression': `regression' guilty 2.precedent 2.nationality 2.type 2.type#2.precedent 2.type#2.nationality, level(95)
	test 2.type#2.nationality 2.type#2.precedent
	estadd scalar joint_p = r(p)
}
esttab reg logit using output\table3_wo_exact`dataversion'.html, replace noomitted nostar b(%4.2f) ci obslast eform(0 1) stats(N joint_p, labels("N" "joint p for student interactions") fmt(%3.0f %5.3f)) nolines

* exact logistic (not outputting through esttab because the confidence intervals seem to get messed up)
gen student = type=="student":type
gen unsympathetic=nat=="unsympathetic":nationality
gen studentXunsympa = student*unsympa
gen reverse = precedent==2
gen studentXreverse = student*reverse
eststo exact: exlogistic guilty student studentXunsympa studentXreverse, condvars(unsympa reverse) memory(2g) nolog level(95) terms(t1= studentXunsympa studentXreverse)
estimates replay, test(score)


*** placebo samples

* bootstrap sample approach (doesn't respect cell sizes, hence not used in paper)
count if type=="judge":type
scalar judge_N = r(N)
local reps = 10000

foreach var in prec nat {
	tab guilty `var' if type=="judge":type, exact
	scalar p_`var' = r(p_exact)
	preserve
		keep if type=="student":type
		tempfile p_`var'
		bootstrap r(p_exact), reps(`reps') size(judge_N) saving(`p_`var'') nodots notable: tab guilty `var', exact
		use `p_`var'', clear
		if "`var'"=="nat" count if _bs_1 < p_`var'
		else count if _bs_1 > p_`var'
		di "fraction more extreme Fisher p's for `var': " r(N)/`reps'
	restore
	
} 

* analytic approach (reported in paper)
preserve // collecting numbers from actual judge sample
	keep if type=="judge":type
	
	foreach treatment in nati prec { // Fisher p-values = test statistics
		qui tab guilty `treatment', exact
		scalar P_`treatment' = r(p_exact)
		scalar P1_`treatment' = r(p1_exact)
		qui ttest guilty, by(`treatment')
		scalar B_`treatment'= r(mu_1) - r(mu_2)
	}
	
	forvalues nati=1/2 { // cell sizes
	forvalues prec=1/2 {
		count if nati==`nati' & prec==`prec'
		scalar N`nati'`prec'= r(N)
	}
	}
	scalar N_1 = N11+N21
	scalar N_2 = N12+N22
	scalar N1_ = N11+N12
	scalar N2_ = N21+N22
	assert N_1 + N_2 == N1_ + N2_ 
	assert N_1 + N_2 == _N
restore

preserve // student sample estimated probabilites (i.e., sample proportions) of drawing a "success" (can't be combined with next loop bc need to preserve)
	keep if type=="student":type
	forvalues nati=1/2 {
	forvalues prec=1/2 {
		sum guilty if nati==`nati' & prec==`prec', meanonly
		scalar p`nati'`prec'= r(mean)
	}
	}
restore

preserve // enumerating probabilities of drawing given number of successes in each treatment cell (will be combined below)
	forvalues nati=1/2 {
	forvalues prec=1/2 {
		clear
		set obs `=N`nati'`prec'+1'
		gen byte t = _n-1
		gen p =  binomialp(N`nati'`prec',t,p`nati'`prec')
		rename (t p) =`nati'`prec'
		tempfile students`nati'`prec'
		save `students`nati'`prec''
	}
	}
restore


clear
forvalues nati=1/2 { // cross the individual cell distributions to get the joint distribution
forvalues prec=1/2 {
	cross using `students`nati'`prec''
}
}
gen p = p11*p12*p21*p22 // we only care about the joint probability
gen byte t_1 = t11+t21 // and we only look at the marginal distributions for each treatment separately
gen byte t_2 = t12+t22
gen byte t1_ = t11+t12
gen byte t2_ = t21+t22
collapse (sum) p, by(t_1 t_2 t1_ t2_)
sum p, meanonly
assert float(r(sum))==1 // check for errors

gen b_nati = t1_/N1_ - t2_/N2_ // b_... (= effect size) and the one-sided p-values are to handle the problem that the judge precedent effect goes in the "wrong" direction
gen b_prec = t_1/N_1 - t_2/N_2
gen p_nati = .
gen p_prec = .
gen p1_nati = .
gen p1_prec = .
forvalues i=1/`=_N' {
	qui tabi `=t1_[`i']' `=N1_-t1_[`i']' \ `=t2_[`i']' `=N2_-t2_[`i']'
	qui replace p_nati=r(p_exact) in `i'
	qui replace p1_nati=r(p1_exact) in `i'
	qui tabi `=t_1[`i']' `=N_1-t_1[`i']' \ `=t_2[`i']' `=N_2-t_2[`i']'
	qui replace p_prec=r(p_exact) in `i'
	qui replace p1_prec=r(p1_exact) in `i'
}

* two-sided
qui sum p if p_nati<=P_nati
di r(sum) // p is the probability of a particular marginal distribution, p_... is the Fischer p-value for that marginal, and P_... is the corresponding Fischer p from the actual data
qui sum p if p_prec<=P_prec
di r(sum)

* two-sided, same sign
qui sum p if p_nati<=P_nati  & sign(b_nati)==sign(B_nati)
di r(sum)
qui sum p if p_prec<=P_prec & sign(b_prec)==sign(B_prec)
di r(sum)

* one-sided
qui sum p if p1_nati<=P1_nati & sign(b_nati)==sign(B_nati)
di r(sum)
qui sum p if p1_prec<=P1_prec & sign(b_prec)==sign(B_prec)
di r(sum)

}